Stochastic Gradient Descent - Machine Learning 2015-2016

Author: Afentoulidis Gregory
Student ID : 4521862

Table of contents

Introduction

A plot of the loss function and finding the global minimum

In [1]:
# import packages
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (10, 6)
import numpy as np
import matplotlib.pyplot as plt
import bokeh.plotting as bp
import matplotlib.animation as animation
from bokeh.models import WheelZoomTool,ResetTool,PanTool
from JSAnimation import IPython_display

W = 590
H = 350
bp.output_notebook()
Populating the interactive namespace from numpy and matplotlib
Loading BokehJS ...
In [2]:
#plotting the loss function
x = np.linspace(-2.,2.,500)
y = x**4 - 2*x**2 + 2*x
i = np.argmin(y)
print("Global minimun of loss function is at point (%.2f,%.2f)"%(x[i],y[i]))
fig = plt.figure()
ax = plt.axes()
plt.plot(x,y)
plt.title("Plot of the loss function $x^{4}-2x^{2}+2x$")
plt.grid(True)
ax.annotate('Min @ '+str(round(x[i],2)),xy=(x[i],y[i]),\
            xytext=(x[i],y[i]+2.5),\
            arrowprops=dict(facecolor='red',shrink=0.02))
Global minimun of loss function is at point (-1.19,-3.21)
Out[2]:
<matplotlib.text.Annotation at 0x7f68044b5dd0>

Testing interactive plots

In [3]:
x = np.linspace(-15,21,100)
y = x**2-6*x+5

TOOLS =[WheelZoomTool(),ResetTool(),PanTool()]

s1 = bp.figure(width=W,plot_height=H,
              title='Local minimum of function',
              tools=TOOLS)
s1.line(x,y,color='navy',alpha=0.5,line_width=2)
s1.circle(3,-4,size=10,color='orange')
s1.title_text_font_size='16pt'
s1.yaxis.axis_label_text_font_size='14pt'
s1.xaxis.axis_label_text_font_size='14pt'

bp.show(s1)
Out[3]:

<Bokeh Notebook handle for In[3]>

In [7]:
old_min = 0
temp_min = 15
step_size = 0.01
precision = 0.001
 
def f_derivative(x):
    return 2*x -6

mins = []
cost = []

while abs(temp_min - old_min) > precision:
    old_min = temp_min 
    gradient = f_derivative(old_min) 
    move = gradient * step_size
    temp_min = old_min - move
    cost.append((3-temp_min)**2)
    mins.append(temp_min)

def init():
    line.set_data([],[])
    return line,

def animate(i):
    x_n = mins[0::10][i]
    y_n = x_n**2-6*x_n+5
    line.set_data(x_n,y_n)
    return line,

fig = plt.figure()
ax = plt.axes(xlim=(-15,21),ylim=(-50,350))
ax.plot(x,y,linewidth=3)
line, = ax.plot([],[],"D",markersize=12)
animation.FuncAnimation(fig,animate,init_func=init,
                       frames=len(mins[0::10]),interval=200)
Out[7]:


Once Loop Reflect

Exercise A

getGrad function get a point,number of samples and outputs an array of values with Gaussian noise drawn from a standard normal distribution based on the formula:

$f'(x)+\mathcal{N}(0,1)$
In [32]:
def getGrad(p,n):
    v = 4*p**3-4*p+2
    arr = np.ones(n)*v
    noise = np.random.normal(loc=0,scale=1,size=n)
    return arr+noise

def batchGradientDescent(init,sampleSize,iterations,learningRate=0.1):
    ws = np.zeros(iterations+1)
    ws[0] = init
    for i in range(iterations):
        grads = getGrad(ws[i],sampleSize)
        ws[i+1] = ws[i] - learningRate*grads.mean()
    return ws

def miniBatchGradientDescent(init,sampleSize,iterations,
                             learningRate=0.1,batch=1):
    ws = np.zeros(iterations+1)
    ws[0] = init
    for i in range(iterations):
        grads = getGrad(ws[i],sampleSize)
        ch = np.random.choice(grads,size=batch)
        ws[i+1] = ws[i] - learningRate*ch.mean()
    return ws
In [81]:
resBatch = batchGradientDescent(1.5,100,100,learningRate=0.1)
resMini = miniBatchGradientDescent(1.5,100,100,batch=10,learningRate=0.1)
resOnline = miniBatchGradientDescent(1.5,100,100,batch=1,learningRate=0.1)
lossBatch = np.abs(resBatch-(-1.19))
lossMini = np.abs(resMini-(-1.19))
lossOnline = np.abs(resOnline-(-1.19))
In [115]:
it = np.arange(1,102)
plt.plot(it,lossBatch,linewidth=2.)
plt.plot(it,lossMini,linewidth=2.)
plt.plot(it,lossOnline,linewidth=2.)
lines = plt.gca().get_lines()
plt.legend(lines,['Batch_Gradient_Loss',\
                  'MiniBatch_Gradient_Loss',\
                  'Online_Gradient_Loss'])
plt.title('Loss of gradient versions in one epoch',color='navy')
plt.xlabel('Loss',color='navy')
plt.ylabel('Iterations',color='navy')
grid(True)

Graph of the three gradient descent versions with interactive tools

In [121]:
TOOLS =[WheelZoomTool(),ResetTool(),PanTool()]

s1 = bp.figure(width=W,plot_height=H,
              title='Loss of gradient versions in one epoch',
              tools=TOOLS)
xs=[it,it,it]
ys=[lossBatch,lossMini,lossOnline]
#s1.multi_line(xs,ys,line_color=['navy','green','red'],line_width=2)
s1.line(it,lossBatch,line_color='navy',legend='Batch_Gradient_Loss',
       line_width=2.)
s1.line(it,lossMini,line_color='green',legend='Mini_Batch_Gradient_Loss',
       line_width=2.)
s1.line(it,lossOnline,line_color='red',legend='Online_Gradient_Loss',
       line_width=2.)
s1.xaxis.axis_label="Iterations/One Epoch"
s1.yaxis.axis_label="Loss"
s1.title_text_font_size='16pt'
s1.yaxis.axis_label_text_font_size='14pt'
s1.xaxis.axis_label_text_font_size='14pt'

bp.show(s1)
Out[121]:

<Bokeh Notebook handle for In[121]>

Animated plot of Batch Gradient Descent Loss for 1 epoch

In [110]:
fig = plt.figure()
ax = plt.axes(xlim=(0,102),ylim=(floor(lossBatch.min()),ceil(lossBatch.max())))
line, = ax.plot([],[],lw=2.,color='navy')
plt.grid(True)
x=[]
y=[]
n=10
def init():
    line.set_data([],[])
    return line,

#lossBatch (y) and iterations(x)
def animate(i):
    x.append(np.linspace(i,i+1,n))
    y.append(np.linspace(lossBatch[i],lossBatch[i+1],n))
    line.set_data(x,y)
    return line,

animation.FuncAnimation(fig,animate,np.arange(0,len(it)-1),
                        init_func=init,
                        interval=50,blit=True,repeat=False)
Out[110]:


Once Loop Reflect

Animated plot of Mini-Batch Gradient Descent Loss for 1 epoch

In [123]:
fig = plt.figure()
ax = plt.axes(xlim=(0,102),ylim=(floor(lossMini.min()),ceil(lossMini.max())))
line, = ax.plot([],[],lw=2.,color='green')
plt.grid(True)
x=[]
y=[]
n=10
def init():
    line.set_data([],[])
    return line,

#lossBatch (y) and iterations(x)
def animate(i):
    x.append(np.linspace(i,i+1,n))
    y.append(np.linspace(lossMini[i],lossMini[i+1],n))
    line.set_data(x,y)
    return line,

animation.FuncAnimation(fig,animate,np.arange(0,len(it)-1),
                        init_func=init,
                        interval=50,blit=True,repeat=False)
Out[123]:


Once Loop Reflect

Animated plot of Online Gradient Descent Loss for 1 epoch

In [124]:
fig = plt.figure()
ax = plt.axes(xlim=(0,102),ylim=(floor(lossOnline.min()),ceil(lossOnline.max())))
line, = ax.plot([],[],lw=2.,color='red')
plt.grid(True)
x=[]
y=[]
n=10
def init():
    line.set_data([],[])
    return line,

#lossBatch (y) and iterations(x)
def animate(i):
    x.append(np.linspace(i,i+1,n))
    y.append(np.linspace(lossOnline[i],lossOnline[i+1],n))
    line.set_data(x,y)
    return line,

animation.FuncAnimation(fig,animate,np.arange(0,len(it)-1),
                        init_func=init,
                        interval=50,blit=True,repeat=False)
Out[124]:


Once Loop Reflect

Exercise B

In [122]:
lossBatch
Out[122]:
array([  2.69000000e+00,   1.71201362e+00,   1.67200656e+00,
         1.62539763e+00,   1.56218098e+00,   1.49525405e+00,
         1.41002303e+00,   1.28849576e+00,   1.10826831e+00,
         8.67954609e-01,   5.64491148e-01,   2.09157063e-01,
         1.55321491e-03,   1.39509832e-02,   2.24342664e-02,
         1.32234281e-02,   3.04033672e-03,   1.30209109e-02,
         4.04345670e-03,   3.98387409e-03,   5.65076132e-03,
         4.91932532e-03,   3.38887254e-04,   1.36081309e-02,
         1.45743052e-02,   7.53220316e-03,   1.56871258e-02,
         7.40065351e-03,   7.94869961e-03,   1.11279896e-03,
         3.87775244e-03,   1.26954696e-02,   1.15686239e-02,
         1.65572674e-02,   9.10554631e-03,   1.88693516e-02,
         1.14024796e-02,   5.22225495e-03,   3.40603132e-03,
         2.10559893e-04,   1.98750586e-03,   3.06642971e-03,
         1.76138810e-02,   1.95132483e-03,   5.32023730e-03,
         8.64065450e-03,   2.12068964e-03,   6.51877236e-03,
         1.87212285e-02,   6.31141906e-03,   8.71904449e-03,
         1.17849283e-02,   4.52898198e-03,   1.76666046e-02,
         1.45631365e-02,   1.85810963e-02,   1.89989012e-03,
         8.45596349e-03,   1.77571691e-03,   5.29296645e-03,
         1.46383322e-03,   7.30609607e-03,   2.90823799e-02,
         1.24551261e-02,   2.33749215e-03,   4.15534842e-03,
         1.36516398e-02,   2.74379354e-05,   2.46374673e-04,
         1.29581803e-02,   1.29928460e-02,   7.14551985e-03,
         1.31173140e-02,   1.01666311e-02,   6.11544336e-03,
         1.87838121e-03,   1.71276187e-03,   1.00839193e-02,
         1.31327940e-02,   1.31058621e-02,   9.18667791e-03,
         4.99264002e-03,   1.52887584e-02,   5.76091755e-03,
         4.21488180e-03,   5.71513219e-03,   1.87825220e-03,
         7.76965944e-03,   7.91993741e-03,   2.20210311e-02,
         1.14566357e-02,   7.81565061e-03,   8.89906692e-03,
         3.39559481e-03,   6.12663769e-03,   3.91433987e-03,
         4.38470218e-03,   3.04399983e-03,   1.69848936e-02,
         1.10115648e-02,   3.41639536e-03])

Exercise C

In [ ]:
 

Exercise D

In [ ]:
 

Exercise E

In [ ]:
 

Exercise F

In [ ]:
 

Exercise G

In [ ]:
 

Exercise H

In [ ]:
 

Exercise I

In [ ]: